##Multivariate resemblance## #libraries sources needed and stuff library(vegan) library(permute) library(simba) #for simba, I got a weird thing that the 'mad' object is masked from 'package:stats' library(cluster) library(ecodist) #for ecodist, I got a weird thing that the 'mantle' object is masked from 'package:vegan' #then I changed working directory from the 'misc' menu, let me check getwd() #ok, then I loaded the biostats source file from the 'File' menu #load env data envdata<-read.csv('env.csv',header=TRUE,row.names=1) speabu<-read.csv('abund.csv',header=TRUE,row.names=1) spetrait<-read.csv('trait.csv',header=TRUE,row.names=1) #Calculating coefficients of similarity for binary data (pres/abs) #transform species abundances into presence/absence (binary transformation) using the power method with an exponent =0 ##REMEMBER, the first column is text, so get rid of it (i.e.[,-1]) speocc<-data.trans(speabu[,-1],method='power',exp=0,plot=F) #calculate similarity coefficients sp.jac<-sim(speocc,method="jaccard") #look at it sp.jac #now try a few different similarity matrices# sp.sim<-sim(speocc,method="simplematching") sp.sor<-sim(speocc,method="soerensen") #how do two types of coefficients compare to eachother? lets plot them.. plot(sp.jac,sp.sim,xlab="Jaccard's coefficient",ylab="Simple Matching coefficient") #add a 1:1 line abline(0,1,col='darkgray') #plot all of the first column (species 1 versus everyone else for both coefficients) plot(sp.jac[1:45],sp.sim[1:45],xlab="Jaccard's coefficient",ylab="Simple matching coefficient",type="n") #note: that type=n thing means, don't plot it yet, then the next two lines mean text(sp.jac[1:45],sp.sim[1:45],row.names=speocc) abline(0,1,col="darkgray") #calculate dissimilarity coefficient sp.bray<-vegdist(speabu[,-1],method='bray') #distance matrix based on Jaccard's sp.jacd<-vegdist(speocc,method='jaccard') #plot jaccard v this distance jaccard thingy plot(sp.jac,1-sp.jacd) ##Calculating coefficients of similarity for mixed data types## #looking at trait data str(spetrait) #calculate Gower's similarity using the daisy function (in the cluster library) sptr.gower<-daisy(spetrait,metric="gower") ##Calculating coefficients (dis)similarity for continuous data #calculating environmental dissimilarity by euclidean distance (most common) and Manhattan distance env.euc<-vegdist(envdata,method="euclidean") env.man<-vegdist(envdata,method="manhattan") #plotting the two distances against eachother plot(env.euc,env.man,xlab='euclid',ylab='manhat') abline(0,1) #converting correlation to a distance env.dis<-sqrt(2-2*cor(envdata)) ##Exercise #read in file of 31 genes with expression values for many developmental stages oys.exp<-read.csv("31oystergeneexp.csv",header=TRUE,row.names=1) #calculate similarity coefficients oys.sim<-sim(oys.exp,method="simplematching") oys.sor<-sim(oys.exp,method="soerensen") #plot them -something is weird here, is it the zero thing? plot(oys.sim,oys.sor,xlab="simple matching",ylab='sorensen') abline(0,1,col="darkgray") #dissimilarity oys.bray<-vegdist(oys.exp,method='bray') #distance oys.euc<-vegdist(oys.exp,method="euclidean") oys.man<-vegdist(oys.exp,method="manhattan") #plot the distances plot(oys.euc,oys.man,xlab='euclid',ylab='manhat') abline(0,1,col="darkgray") #transformt the data and calc. coefficients again oyslog<-data.trans(oys.exp,method='log',plot=F) oys.euc.log<-vegdist(oyslog,method="euclidean") plot(oys.euc,oys.euc.log,xlab='euclid',ylab='euclid transform log') abline(0,1,col='darkgray')